﻿###Power Calculations###
#Freenstra, Janusz, Suong


###Basic Power calculator
power_calculator <- function(mu_t, mu_c, sigma, alpha=0.05, N){ 
  lowertail <- (abs(mu_t - mu_c)*sqrt(N))/(2*sigma) 
  uppertail <- -1*lowertail 
  beta <- pnorm(lowertail- qnorm(1-alpha/2), lower.tail=TRUE) + 1- pnorm(uppertail- qnorm(1-alpha/2), lower.tail=FALSE) 
  return(beta) 
  } 
#Choose values for mu_t, mu_c, sigma and N
mu_t <-65
mu_c <-60
sigma <-10
N<-50

power_calculator <- function(mu_t, mu_c, sigma, alpha=0.05, N)



###Power simulation for standard design###
#Power should exceed .80 as a rule of thumb
#When power =.80  80% of experiments will return a statistically significant result result when there is a true effect


possible.ns <- seq(from=100, to=2000, by=50)     # The sample sizes we'll be considering
powers <- rep(NA, length(possible.ns))           # Empty object to collect simulation estimates
alpha <- 0.05                                    # Standard significance level
sims <- 500                                      # Number of simulations to conduct for each N

#### Outer loop to vary the number of subjects ####
for (j in 1:length(possible.ns)){
  N <- possible.ns[j]                              # Pick the jth value for N
  
  significant.experiments <- rep(NA, sims)         # Empty object to count significant experiments
  
  #### Inner loop to conduct experiments "sims" times over for each N ####
  for (i in 1:sims){
    Y0 <-  rnorm(n=N, mean=60, sd=20)              # control potential outcome
    tau <- 5                                       # Hypothesize treatment effect
    Y1 <- Y0 + tau                                 # treatment potential outcome
    Z.sim <- rbinom(n=N, size=1, prob=.5)          # Do a random assignment
    Y.sim <- Y1*Z.sim + Y0*(1-Z.sim)               # Reveal outcomes according to assignment
    fit.sim <- lm(Y.sim ~ Z.sim)                   # Do analysis (Simple regression)
    p.value <- summary(fit.sim)$coefficients[2,4]  # Extract p-values
    significant.experiments[i] <- (p.value <= alpha) # Determine significance according to p <= 0.05
  }
  
  powers[j] <- mean(significant.experiments)       # store average success rate (power) for each N
}
#Choose values for tau, mean, sd

plot(possible.ns, powers, ylim=c(0,1))





###Power Simulation w/ covariate control###
#Including covariates as regressiors improves precision by reducing noise

rm(list=ls())
possible.ns <- seq(from=100, to=2000, by=50)
powers <- rep(NA, length(possible.ns))
powers.cov <- rep(NA, length(possible.ns))        # Need a second empty vector
alpha <- 0.05
sims <- 500
for (j in 1:length(possible.ns)){
  N <- possible.ns[j]
  
  significant.experiments <- rep(NA, sims)
  significant.experiments.cov <- rep(NA, sims)      # Need a second empty vector here too
  
  for (i in 1:sims){
    gender <- c(rep("F", N/2), rep("M", N/2))       # Generate "gender" covariate
    age <- sample(x=18:65, size=N, replace=TRUE)    # Generate "age" covariate
    effectofgender <- 10                            # Hypothesize the "effect" of gender on income
    effectofage <- 2                                # Hypothesize the "effect" of age on income
    
    ## Hypothesize Control Outcome as a function of gender, age, and error
    Y0 <- effectofgender*(gender=="M") + effectofage*age + rnorm(n=N, mean=100, sd=20)
    
    ## This is all the same ##
    tau <- 5
    Y1 <- Y0 + tau
    Z.sim <- rbinom(n=N, size=1, prob=.5)
    Y.sim <- Y1*Z.sim + Y0*(1-Z.sim)
    fit.sim <- lm(Y.sim ~ Z.sim)
    
    ## This is the novel analysis -- including two covariates to increase precision ##
    fit.sim.cov <- lm(Y.sim ~ Z.sim + (gender=="M") + age)
    
    ## extract p-values and calculate significance ##
    p.value <- summary(fit.sim)$coefficients[2,4]
    p.value.cov <- summary(fit.sim.cov)$coefficients[2,4]
    significant.experiments[i] <- (p.value <= alpha)
    significant.experiments.cov[i] <- (p.value.cov <= alpha)
  }
  
  powers[j] <- mean(significant.experiments)
  powers.cov[j] <- mean(significant.experiments.cov)
}

plot(possible.ns, powers, ylim=c(0,1))
points(possible.ns, powers.cov, col="red")



###Power Calculation for Conjoint Experiment###

#Rule of thumb
#https://www.sawtoothsoftware.com/download/techpap/samplesz.pdf
N<-500c/(t*a) 

#t= Number of Tasks
#a= # alternatives
#c= largest number of levels or largest product of levels of any 2 attributes if you are interested in interactions




#Example based on Bekker-Grob et al. (2015)
http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4575371/
#1
test_alpha=0.05                                									    #Standard significance level
z_one_minus_alpha<-qnorm(1-test_alpha)												#1 tailed, divide tes_alpha by 2 if you want to run a 2 tailed test)

#2
test_beta=0.2																		#Standard statistical power 80% (i.e., 1-B=.20)
z_one_minus_beta<-qnorm(1-test_beta)			

#3
#Choose model-->This affects the way the Asymptotic variance-covariance matrix (AVC) needs to be calculated


#4
parameters<-c(1.23 , -0.31 , -0.21 , -0.44 , 0.028 , -1.10 , -0.04 , -0.0015)   		#Initial beliefs about parameter values
#parameters (constant, B1, B2, B3, C, D, E, F)											#(2 categorical attributes and 3 linear attributes)						

#5 Design
																	
ncoefficients=8																			#Number of parameters to estimate	
nalts=3																					#Number of alternatives
nchoices=16																				#Numbers of choices

# load the design information															#one row for each alternative (so nalts *nchoices rows)
design<-as.matrix(read.table("YOUR MATRIX".txt",header=FALSE));

#6
#compute the information matrix
info_mat=matrix(rep(0,ncoefficients*ncoefficients), ncoefficients, ncoefficients)   	# initialize a matrix of size ncoefficients by ncoefficients filled with zeros.
exputilities=exp(design%*%parameters)                                               	# compute exp(design matrix times initial parameter values) 
for (k_set in 1:nchoices) {																# loop over all choice sets
alternatives=((k_set-1)*nalts+1) : (k_set*nalts)										# select alternatives in the choice set
p_set=exputilities[alternatives]/sum(exputilities[alternatives])						# obtain vector of choice shares within the choice set
p_diag=diag(p_set)																		# also put these probabilities on the diagonal of a matrix that only contains zeros
middle_term<-p_diag-p_set%o%p_set														# compute middle term P-pp’
full_term<-t(design[alternatives,])%*%middle_term%*%design[alternatives,]				# pre- and postmultiply with the Xs from the design matrix for the alternatives in this choice set
info_mat<-info_mat+full_term 															# Add contribution of this choice set to the information matrix
}																						# end of loop over choice sets
sigma_beta<-solve(info_mat,diag(ncoefficients)) 										#get the inverse of the information matrix (i.e., gets the variance-covariance matrix)


#7
#Calculate N
effectsize<-parameters																	# Use the parameter values as effect size. Other values can be used here.
N<-((z_one_minus_beta + z_one_minus_alpha)*sqrt(diag(sigma_beta))/abs(effectsize))^2	# formula for sample size calculation is n>[(z_(beta)+z_(1-alpha))*sqrt(Σγκ)/delta]^2 
N																						# Display results (required sample size for each coefficient)

























